#delim cr
set more off
set varabbrev off
pause on
graph set ps logo off

capture log close
set linesize 255
set logtype text
log using ../log/study-pollution-in-daily-data.log , replace

/* --------------------------------------

AUTHOR: Tal Gross

PURPOSE: Look at how air-quality affects
the workers at CTRIP.

DATE CREATED: June 12, 2014

NOTES:

--------------------------------------- */

clear all
estimates clear
set matsize 10000
describe, short

************************************************************
**   Programs Used Below                          
************************************************************

capture program drop tabresults
program tabresults
	disp "Formatted Tables:"
	estout * , keep( `1' ) ///
	stats(r2 N, fmt(%9.6f %9.0g)) ///   
	cells(b(star fmt(%9.4f)) se(fmt(%9.4f) par([ ]) nostar) ) ///
	style(fixed) replace type starlevels(* 0.05 ** 0.01)

	estimates clear
end

capture program drop make_weather_bins
program make_weather_bins

	disp "Temperature range in Shanghai:"
	sum ltemp if shanghai_prod_sample == 1
	disp "Temperature range in Nantong:"
	sum ltemp if nantong_prod_sample == 1
	** Create temperature bins interacted with city indicators...
	gen ltemp_m_40 = (ltemp >= -100 & ltemp < 40)
	gen ltemp_40_50 = (ltemp >= 40 & ltemp < 50)
	gen ltemp_50_60 = (ltemp >= 50 & ltemp < 60)
	gen ltemp_60_70 = (ltemp >= 60 & ltemp < 70)
	gen ltemp_70_80 = (ltemp >= 70 & ltemp < 80)
	gen ltemp_80_p = (ltemp >= 80 & ltemp < .)

	foreach var in ltemp_m_40 ltemp_40_50 ltemp_50_60 ltemp_60_70 ltemp_70_80 ltemp_80_p {
		gen sh_`var' = shanghai_prod_sample * `var'
		gen na_`var' = nantong_prod_sample * `var'
	}
end

************************************************************
**   Bring in all our weather data
************************************************************

use ../dta/all-our-environmental-data.dta , clear

d, f

keep Date ///
nantong_temp nantong_shuang_api_xx nantong_shuang_api_pm10 ///
precipitation tmax_cel tmin_cel tmax_fah tmin_fah visib temp dewp slp stp wdsp mxspd prcp frshtt 

tempfile weather
save `weather'

************************************************************
**   Bring in the key pollutants
************************************************************

use ../dta/imputed-pollution-measures.dta , clear

d, f

tempfile our_pollutants
save `our_pollutants'

************************************************************
**   Bring in the CTRIP data
************************************************************

use ../dta/ctrip-cleaned-daily-records.dta , clear

d, f
sum

************************************************************
**   Clean outcomes
************************************************************

sum LogInLengthsecond CallLengthsecond NumCall

gen calls_per_minute = NumCall / (LogInLengthsecond / 60)
gen log_calls_per_minute = log(calls_per_minute)
sum log_calls_per_minute calls_per_minute

gen calls_per_phone_minutes = NumCall / (CallLengthsecond / 60)
gen log_calls_per_phone_minutes = log(calls_per_phone_minutes)

gen log_ph_min_per_call = log( CallLengthsecond / NumCall )
sum log_ph_min_per_call

gen minutes_on_phone = CallLengthsecond / 60
gen log_minutes_on_phone = log(CallLengthsecond / 60)
gen log_minutes_logged = log(LogInLengthsecond / 60)
sum log_minutes_logged LogInLengthsecond

gen log_NumCall = log(NumCall)
sum log_NumCall NumCall

gen log_waiting_time = log(LogInLengthsecond - CallLengthsecond)
sum log_waiting_time

gen waiting_ratio = (LogInLengthsecond - CallLengthsecond) / LogInLengthsecond 
gen log_waiting_ratio = log(waiting_ratio)
sum waiting_ratio log_waiting_ratio
centile waiting_ratio , c(1 5 10 20 30 40 50 60 70 80 90 95 99)

** We label workers as late if they show up after 10 AM
gen byte late_to_work = (hour1 >= 10) 
replace late_to_work = . if hour1 == .
sum late_to_work

gen log_grosswage = log(grosswage)
sum log_grosswage grosswage

** New outcomes to capture mechanical effect
gen log_minphone_minlog = log( CallLengthsecond / LogInLengthsecond )
gen log_calls_minlog = log( NumCall / LogInLengthsecond)
sum log_*

************************************************************
**   Describe variation in hotel versus flight
************************************************************

** We were told that workers are permanently put on a "flight"
** or "hotel" team. But there appears to be some switching.
preserve

	** RESTRICTIONS
	** only controls and chose-not-to-volunteer groups
	** everything before end of experiment
	tab expgroup
	*keep if expgroup == 0 | expgroup == 1 | expgroup == 2

	collapse (mean) hotel (first) expgroup, by(EmployeeID) fast

	count if hotel != 0 & hotel != 1 & hotel != . & expgroup == 0
	count if hotel != 0 & hotel != 1 & hotel != . & expgroup == 1
	count if hotel != 0 & hotel != 1 & hotel != . & expgroup == 2
	count if hotel != 0 & hotel != 1 & hotel != . & expgroup == .

	sum hotel
	centile hotel , c(1 2 3 4 5 10 20 30 40 50 60 70 80 90 95 96 97 98 99)
restore

** We create a variable to mark those who switch 
gen byte flight = 1 - hotel
egen max_hotel = max(hotel) , by(EmployeeID)
egen max_flight = max(flight) , by(EmployeeID)
gen byte hotel_flight_switcher = (max_hotel == max_flight == 1)
sum max_hotel max_flight hotel_flight_switcher
table expgroup , c(mean hotel_flight_switcher)
drop max_hotel max_flight flight

************************************************************
**   Clean the time-clock variables
************************************************************

** Note that, for a sub-sample of observations, we have time-card
** data. We study those outcomes here.
gen timeclock_hours = (hour2 - hour1) + ((minute2 - minute1) / 60)
sum timeclock_hours

gen byte timeclock_hours_missing = (timeclock_hours == .)

table expgroup , c(mean timeclock_hours_missing)

sum timeclock_hours* 

sum timeclock_hours* 
table yearmonth , c(mean timeclock_hours N timeclock_hours)
table expgroup , c(mean timeclock_hours N timeclock_hours )

** The time-clock variables are problematic for several reasons.
** Many observations seem to have longer logged-in times than 
** time-clock hours
gen hours_logged_in = LogInLengthsecond / 60 / 60
compare timeclock_hours hours_logged_in
centile timeclock_hours hours_logged_in, c(10 20 30 40 50 60 70 80 90 95 99)

************************************************************
**   Merge CTRIP data to weather and pollution
************************************************************

merge m:1 Date using `weather'
drop _merge 

merge m:1 Date using `our_pollutants'
drop _merge 

************************************************************
**   Describe minutes logged in versus NumCall
************************************************************

** There appear to be days when there are no calls but many minutes
** logged in.
preserve

	** RESTRICTIONS
	** only controls and chose-not-to-volunteer groups
	** everything before end of experiment
	keep if expgroup == 0 | expgroup == 2
	keep if Date <= mdy(8, 14, 2011) 

	sum NumCall CallLengthsecond LogInLengthsecond 
	count if NumCall == . & LogInLengthsecond != .
	count if NumCall == 0 & LogInLengthsecond != 0

	qui reg NumCall junjie_api_xx hotel tmax_fah precipitation 
	disp "Number of observations: `e(N)'"
	gen in_numcall_sample = e(sample)
	qui reg LogInLengthsecond junjie_api_xx hotel tmax_fah precipitation 
	disp "Number of observations: `e(N)'"
	gen in_login_sample = e(sample)

	tab in_numcall_sample in_login_sample
	tab Date if in_numcall_sample == 0 & in_login_sample == 1
	tab EmployeeID if in_numcall_sample == 0 & in_login_sample == 1

restore

************************************************************
**   Clean the environmental variables
************************************************************

** Harmonize temperature across cities, so both are in Fahrenheit
sum nantong_temp tmax_fah
replace nantong_temp = 32 + (9 / 5) * nantong_temp
sum nantong_temp tmax_fah

** Squared temperature
gen tmax_fah2 = tmax_fah ^ 2
gen nantong_temp2 = nantong_temp ^ 2

sum junjie_api_xx pm10 pm25 pm25_imp
table yearmonth , c(mean junjie_api_xx mean pm10 mean pm25 mean pm25_imp)

** NOTE: To keep pollution point estimates interpretable, we divide by 10 here
sum junjie_api_xx pm10 pm25 pm25_imp
foreach var in junjie_api_xx pm10 pm25 pm25_imp nantong_shuang_api_pm10 nantong_shuang_api_xx {
	replace `var' = `var' / 10
}
sum junjie_api_xx pm10 pm25 pm25_imp

************************************************************
**   Clean commute variable
************************************************************

sum commute
sum commute , det

gen byte commute90 = (commute >= 90 & commute < .)
gen byte commute120 = (commute >= 120 & commute < .)

bysort EmployeeID: gen byte first_ob = (_n == 1)
centile commute if first_ob == 1 & commute != ., c(10 25 33 50 66 75 90)
centile commute if first_ob == 1 & commute != ., c(33 66)
local lower = r(c_1)
local upper = r(c_2)
gen commute_tercile = 1 * (commute >= 0 & commute < `lower') + 2 * (commute >= `lower' & commute < `upper') + 3 * (commute >= `upper' & commute < .)
drop first_ob
gen byte commute_tercile1 = (commute_tercile == 1)
gen byte commute_tercile2 = (commute_tercile == 2)
gen byte commute_tercile3 = (commute_tercile == 3)

************************************************************
**   Adjustment
************************************************************

replace nantong_shuang_api_pm10 = nantong_shuang_api_xx if nantong_shuang_api_pm10==.

************************************************************
**   Create leads and lags of the pollution variables
************************************************************

preserve

	bysort Date: keep if _n == 1

	sort Date

	list Date junjie_api_xx in 1/50

	foreach pollutant in junjie_api_xx pm10 pm25 pm25_imp nantong_shuang_api_pm10 {
		forvalues i = 1/5 {
			gen `pollutant'_m`i' = `pollutant'[_n - `i']
			gen `pollutant'_p`i' = `pollutant'[_n + `i']

			** If there's a gap in the dates, then we replace the lead or 
			** lag to be missing.
			replace `pollutant'_m`i' = . if Date[_n - `i'] != Date - `i' 
			replace `pollutant'_p`i' = . if Date[_n + `i'] != Date + `i' 
		}
	}

	list Date junjie_api_xx junjie_api_xx_p1 junjie_api_xx_m1 junjie_api_xx_m2 in 1/50

	keep Date *_m1 *_m2 *_m3 *_m4 *_m5 *_p1 *_p2 *_p3 *_p4 *_p5

	tempfile lead_lags
	save `lead_lags'
restore

merge m:1 Date using `lead_lags' 
drop _merge

************************************************************
**   Restrictions
************************************************************

drop if hotel_flight_switcher

gen byte problematic_calls = (NumCall == . | NumCall == 0)
egen problem_share = mean(problematic_calls) , by(EmployeeID)
sum problem_share
sum problem_share , det
drop problematic_calls

drop if NumCall == 0 | NumCall == .

************************************************************
**   Create identifiers for a clean sample
************************************************************

gen byte shanghai_prod_sample = 0
replace shanghai_prod_sample = 1 if (expgroup == 0 | expgroup == 1 | expgroup == 2) & (Date <= mdy(8, 14, 2011) ) 
replace shanghai_prod_sample = 0 if minutes_on_phone == 0 | junjie_api_xx == . | LogInLengthsecond == . | LogInLengthsecond == 0 | NumCall == .
count if shanghai_prod_sample == 1

gen byte shanghai_home_exp_sample = 0
replace shanghai_home_exp_sample = 1 if (Date >= mdy(12, 6, 2010) & Date <= mdy(8, 14, 2011) ) & (expgroup == 0 | expgroup == 1) & junjie_api_xx != .
count if shanghai_home_exp_sample == 1

gen byte nantong_prod_sample = 0
replace nantong_prod_sample = 1 if (first_initial == "N") & nantong_shuang_api_pm10 != .
replace nantong_prod_sample = 0 if minutes_on_phone == . | minutes_on_phone == 0 | nantong_shuang_api_pm10 == . | LogInLengthsecond == 0 | LogInLengthsecond == . | NumCall == . | nantong_temp == . 
count if nantong_prod_sample == 1

** One check here:
count if first_initial == "N"
codebook Date if first_initial == "N"
codebook Date if first_initial == "N" & nantong_prod_sample != .

************************************************************
**   Prepare Fixed Effects
************************************************************

** The following code is easier if we break apart the fixed
** effects as follows...

capture program drop create_fes
program define create_fes

	** We now prepare the fixed effects...
	gen ym = 100 * year(Date) + month(Date)
	sum ym
	qui levelsof ym , local(cells)
	foreach cell of local cells {
		disp "Now on value `cell'"
		gen byte ym_`cell'  = (ym == `cell')
	}
	tab day_of_week
	forvalues i = 0/6 {
		gen byte dow_`i' = (day_of_week == `i')
	}
end

create_fes

capture program drop take_out_worker_fe
program define take_out_worker_fe
	** Now we have partial out the employee fixed effects
	foreach var in ///
	hotel ltemp ltemp2 ///
	ym_201001 ym_201002 ym_201003 ym_201004 ym_201005 ym_201006 ym_201007 ym_201008 ym_201009 ym_201010 ym_201011 ym_201012 ym_201101 ym_201102 ym_201103 ym_201104 ym_201105 ym_201106 ym_201107 ym_201108 ym_201109 ym_201110 ym_201111 ym_201112 ym_201201 ym_201202 ym_201203 ym_201204 ym_201205 ym_201206 ym_201207 ym_201208 ym_201209 ym_201210 ym_201211 ym_201212 ///
	dow_0 dow_1 dow_2 dow_3 dow_4 dow_5 dow_6 ///
	`1' {
		disp "Now on variable `var'"
		qui areg `var' , a(EmployeeID) 
		predict r_`var' , resid
	}
end

************************************************************
**   Create lists of key variables
************************************************************

local pollutants = "junjie_api_xx pm10 pm25 pm25_imp"
local full_window_pollutants = "junjie_api_xx pm10 pm25_imp"
d `pollutants' , f
sum `pollutants'

local key_outcomes = "log_NumCall log_minutes_on_phone log_calls_per_phone_minutes"
d `key_outcomes' , f
sum `key_outcomes'

local key_outcomes_levels = "NumCall minutes_on_phone calls_per_phone_minutes"
d `key_outcomes_levels' , f
sum `key_outcomes_levels'

estimates clear

************************************************************
************************************************************
**   Analysis Section Below
************************************************************
************************************************************

************************************************************
**   Labor Supply, Nantong, extensive-margin piece, FP
************************************************************

** Step 1: create a skeleton for working days
preserve
	keep if nantong_prod_sample == 1

	isid EmployeeID Date

	gen byte worked = (NumCall > 0 & NumCall < .)
	collapse (sum) worked NumCall, by(Date) fast
	sum worked
	centile worked , c(1 5 10 20 30 50 70 80 90 95)
	*list Date worked NumCall

	gen dow = dow(Date)
	table dow , c(mean worked)
	drop dow

	gen byte working_day = (worked > 0 & worked != .) /* Tom changed it to this from: (worked > 1200) */
	keep if working_day == 1
	keep Date working_day 

	tempfile working_days
	save `working_days'
restore

** Step 2: merge in the skeleton, 
preserve
	keep if nantong_prod_sample == 1

	** The following is memory-intensive, so we drop unnecessary variables here
	keep Date day_of_week EmployeeID NumCall nantong_shuang_api_pm10 hotel nantong_temp nantong_temp2 

	merge m:1 Date using `working_days' 
	drop _merge

	** For this exercise, we only want days when people were working
	keep if working_day == 1
	drop working_day

	count
	fillin EmployeeID Date
	count
	tab _fillin

	gen byte worked_that_day = (NumCall > 0 & NumCall < .)

	** Now the tricky part: we have to spread the data to the new observations
	sum nantong_shuang_api_pm10 hotel nantong_temp nantong_temp2 if _fillin == 0
	sum nantong_shuang_api_pm10 hotel nantong_temp nantong_temp2 if _fillin == 1

	** First we spread over the variables by date
	foreach var in nantong_shuang_api_pm10 nantong_temp nantong_temp2 ///
	day_of_week ///
	{
		egen max_`var' = max(`var') , by(Date)
		replace `var' = max_`var' if `var' == .
		drop max_`var'
	}

	** Then we spread over the variables by employee
	foreach var in hotel {
		egen max_`var' = max(`var') , by(EmployeeID)
		replace `var' = max_`var' if `var' == .
		drop max_`var'
	}

	** Finally, we only focus on work days or absenses that happened
	** "near" work days
	gen byte main_sample = 0
	replace main_sample = 1 if worked_that_day == 1
	sort EmployeeID Date
	*by EmployeeID: replace main_sample = 1 if ///
	*worked_that_day[_n - 1] == 1 | ///
	*worked_that_day[_n - 2] == 1 
	*by EmployeeID: replace main_sample = 1 if ///
	*worked_that_day[_n + 1] == 1 | ///
	*worked_that_day[_n + 2] == 1 
	** Tom switched to this approach:
	tab main_sample , miss
	by EmployeeID: replace main_sample = 1 if ///
	worked_that_day[_n + 1] == 1 | ///
	worked_that_day[_n + 2] == 1 | ///
	worked_that_day[_n + 3] == 1 | ///
	worked_that_day[_n + 4] == 1 | ///
	worked_that_day[_n + 5] == 1 
	tab main_sample , miss

	** Also limit to workers who have worked many days
	tab main_sample , miss
	sort EmployeeID Date
	by EmployeeID: gen byte first_ob = (_n == 1)
	egen days_worked = sum(worked_that_day) , by (EmployeeID)
	centile days_worked if first_ob == 1, c(10 20 30 40 50 60 70 80 90)
	drop first_ob
	tab main_sample , miss

	gen fooDate = Date if main_sample==1
	egen min_date = min(fooDate), by(EmployeeID)
	drop if Date < min_date
	gen barDate = Date if main_sample==1
	egen max_date = max(barDate), by(EmployeeID)
	drop if Date > max_date

	tab main_sample

	keep if main_sample == 1

	** We save out these data for the pooled extensive-margin test below
	drop main_sample
	gen byte nantong_piece = 1
	save ../dta/nantong_extmarg_sample.dta , replace
	drop nantong_piece

	create_fes

	sum worked_that_day nantong_shuang_api_pm10 hotel nantong_temp nantong_temp2 ym_* dow_* 

	** RESULTS: linear extensive margin, original SE's, Nantong
	qui eststo: reg worked_that_day nantong_shuang_api_pm10 nantong_temp nantong_temp2 hotel ym_* dow_* , cluster(Date) 
	qui eststo: areg worked_that_day nantong_shuang_api_pm10 nantong_temp nantong_temp2 hotel ym_* dow_* , cluster(Date)  a(EmployeeID)

	tabresults "nantong_shuang_api_pm10 nantong_temp nantong_temp2"

	** Create binned api
	gen byte api_0_5 = (nantong_shuang_api_pm10 > 0 & nantong_shuang_api_pm10 <= 5)
	gen byte api_5_10 = (nantong_shuang_api_pm10 > 5 & nantong_shuang_api_pm10 <= 10)
	gen byte api_10_15 = (nantong_shuang_api_pm10 > 10 & nantong_shuang_api_pm10 <= 15)
	gen byte api_15_20 = (nantong_shuang_api_pm10 > 15 & nantong_shuang_api_pm10 <= 20)
	gen byte api_20_p = (nantong_shuang_api_pm10 > 20 & nantong_shuang_api_pm10 < .)

	** Now we have partial out the employee fixed effects
	foreach var in ///
	hotel nantong_temp nantong_temp2 ///
	ym_201109 ym_201110 ym_201111 ym_201112 ym_201201 ym_201202 ym_201203 ym_201204 ym_201205 ym_201206 ym_201207 ym_201208 ym_201209 ym_201210 ym_201211 ym_201212 ///
	dow_0 dow_1 dow_2 dow_3 dow_4 dow_5 dow_6 ///
	worked_that_day nantong_shuang_api_pm10 api_0_5 api_5_10 api_10_15 api_15_20 api_20_p ///
	{
		disp "Now on variable `var'"
		qui areg `var' , a(EmployeeID) 
		predict r_`var' , resid
	}

	** TABLE 7B: linear extensive margin, multi-way clustering, Nantong
	qui eststo: cgmreg worked_that_day nantong_shuang_api_pm10 nantong_temp nantong_temp2 hotel ym_* dow_* , cluster(Date EmployeeID) 

	tabresults "*nantong_shuang_api_pm10* *temp* "

	** APPENDIX TABLE A2: linear extensive margin, multi-way clustering, Nantong
	qui eststo: cgmreg r_worked_that_day r_nantong_shuang_api_pm10 r_nantong_temp r_nantong_temp2 r_hotel r_ym_* r_dow_* , cluster(Date EmployeeID)  
	tabresults "*nantong_shuang_api_pm10* *temp* "

	** APPENDIX TABLE A2 JUST for R2: linear extensive margin, Nantong
	qui eststo: areg worked_that_day nantong_shuang_api_pm10 nantong_temp nantong_temp2 hotel ym_* dow_* , a(EmployeeID)
	tabresults "*nantong_shuang_api_pm10* *temp* "

	bysort Date: gen byte first_ob = (_n == 1)
	tab1 api_* if first_ob == 1
	foreach cat in api_0_5 api_5_10 api_10_15 api_15_20 api_20_p  {
		disp "For bin `cat': "
		sum NumCall if `cat' == 1
	}
	drop first_ob

	drop if nantong_shuang_api_pm10 == .

	** RESULTS: non-linear extensive margin, original SE's, Nantong
	qui eststo: reg worked_that_day api_5_10 api_10_15 api_15_20 hotel nantong_temp nantong_temp2 ym_* dow_* , cluster(Date) 
	qui eststo: areg worked_that_day api_5_10 api_10_15 api_15_20 nantong_temp nantong_temp2 ym_* dow_* , cluster(Date) a(EmployeeID)
	tabresults "*api_* *temp* "

	** RESULTS: non-linear extensive margin, multi-way clustering, Nantong
	qui eststo: cgmreg worked_that_day api_5_10 api_10_15 api_15_20 hotel nantong_temp nantong_temp2 ym_* dow_* , cluster(Date EmployeeID) 
	qui eststo: cgmreg r_worked_that_day r_api_5_10 r_api_10_15 r_api_15_20 r_nantong_temp r_nantong_temp2 r_ym_* r_dow_* , cluster(Date EmployeeID) 
	tabresults "*api_* *temp* "
restore

************************************************************
**   Labor Supply, Shanghai, extensive-margin piece, FP
************************************************************

** Step 1: create a skeleton for working days
preserve
	** Only those in Shanghai and in BLRY data: 
	keep if shanghai_prod_sample == 1

	isid EmployeeID Date

	gen byte worked = (NumCall > 0 & NumCall < .)
	collapse (sum) worked NumCall, by(Date) fast
	sum worked
	centile worked , c(1 5 10 20 30 50 70 80 90 95)
	*list Date worked NumCall

	gen dow = dow(Date)
	table dow , c(mean worked)
	drop dow

	gen byte working_day = (worked > 0 & worked != .) /* Tom changed it to this from: (worked > 1200) */
	keep if working_day == 1
	keep Date working_day 

	tempfile working_days
	save `working_days'
restore

** Step 2: merge in the skeleton, 
preserve
	** Only those in Shanghai and in BLRY data: 
	keep if shanghai_prod_sample == 1

	** The following is memory-intensive, so we drop unnecessary variables here
	keep Date EmployeeID NumCall yearmonth day_of_week junjie_api_xx hotel tmax* precipitation shanghai_prod_sample

	merge m:1 Date using `working_days' 
	drop _merge

	** For this exercise, we only want days when people were working
	keep if working_day == 1
	drop working_day

	count
	fillin EmployeeID Date
	count
	tab _fillin

	gen byte worked_that_day = (NumCall > 0 & NumCall < .)

	** Now the tricky part: we have to spread the data to the new observations
	sum junjie_api_xx hotel tmax_fah tmax_fah2 precipitation if _fillin == 0
	sum junjie_api_xx hotel tmax_fah tmax_fah2 precipitation if _fillin == 1

	** First we spread over the variables by date
	foreach var in junjie_api_xx tmax_fah tmax_fah2 precipitation yearmonth day_of_week {
		egen max_`var' = max(`var') , by(Date)
		replace `var' = max_`var' if `var' == .
		drop max_`var'
	}

	** Then we spread over the variables by employee
	foreach var in hotel {
		egen max_`var' = max(`var') , by(EmployeeID)
		replace `var' = max_`var' if `var' == .
		drop max_`var'
	}

	** Finally, we only focus on work days or absenses that happened
	** "near" work days
	gen byte main_sample = 0
	replace main_sample = 1 if worked_that_day == 1
	sort EmployeeID Date
	tab main_sample , miss
	***by EmployeeID: replace main_sample = 1 if ///
	***worked_that_day[_n - 1] == 1 | ///
	***worked_that_day[_n - 2] == 1 
	***by EmployeeID: replace main_sample = 1 if ///
	***worked_that_day[_n + 1] == 1 | ///
	***worked_that_day[_n + 2] == 1 
	** Tom switched to this approach:
	tab main_sample , miss
	by EmployeeID: replace main_sample = 1 if ///
	worked_that_day[_n + 1] == 1 | ///
	worked_that_day[_n + 2] == 1 | ///
	worked_that_day[_n + 3] == 1 | ///
	worked_that_day[_n + 4] == 1 | ///
	worked_that_day[_n + 5] == 1 
	tab main_sample , miss

	keep if main_sample == 1

	** We save out these data for the pooled extensive-margin test below
	drop main_sample
	gen byte shanghai_piece = 1
	save ../dta/shanghai_extmarg_sample.dta , replace
	drop shanghai_piece

	create_fes

	** RESULTS: linear extensive margin, original SE's, shanghai
	qui eststo: reg worked_that_day junjie_api_xx hotel tmax_fah tmax_fah2 ym_* dow_* , cluster(Date) 
	qui eststo: areg worked_that_day junjie_api_xx hotel tmax_fah tmax_fah2 ym_* dow_* , cluster(Date)  a(EmployeeID)

	tabresults "junjie_api_xx tmax_fah tmax_fah2 "

	** Create binned api
	gen byte api_0_5 = (junjie_api_xx > 0 & junjie_api_xx <= 5)
	gen byte api_5_10 = (junjie_api_xx > 5 & junjie_api_xx <= 10)
	gen byte api_10_15 = (junjie_api_xx > 10 & junjie_api_xx <= 15)
	gen byte api_15_20 = (junjie_api_xx > 15 & junjie_api_xx <= 20)
	gen byte api_20_p = (junjie_api_xx > 20 & junjie_api_xx < .)

	** Now we have partial out the employee fixed effects
	d, f
	foreach var in ///
	hotel junjie_api_xx tmax_fah tmax_fah2 precipitation ///
	ym_201001 ym_201002 ym_201003 ym_201004 ym_201005 ym_201006 ym_201007 ym_201008 ym_201009 ym_201010 ym_201011 ym_201012 ym_201101 ym_201102 ym_201103 ym_201104 ym_201105 ym_201106 ym_201107 ym_201108 ///
	dow_0 dow_1 dow_2 dow_3 dow_4 dow_5 dow_6 ///
	worked_that_day api_0_5 api_5_10 api_10_15 api_15_20 api_20_p ///
	{
		disp "Now on variable `var'"
		qui areg `var' , a(EmployeeID) 
		predict r_`var' , resid
	}

	** TABLE 7A: linear extensive margin, multi-way clustering, shanghai
	qui eststo: cgmreg worked_that_day junjie_api_xx hotel tmax_fah tmax_fah2 precipitation ym_* dow_* , cluster(Date EmployeeID) 
	tabresults "*api* *tmax* "

	** APPENDIX TABLE A2: linear extensive margin, multi-way clustering, shanghai
	qui eststo: cgmreg r_worked_that_day r_junjie_api_xx r_hotel r_tmax_fah r_tmax_fah2 r_precipitation r_ym_* r_dow_* , cluster(Date EmployeeID)  
	tabresults "*api* *tmax* "

	** APPENDIX TABLE A2 just for R2: linear extensive margin, shanghai
	qui eststo: areg worked_that_day junjie_api_xx hotel tmax_fah tmax_fah2 precipitation ym_* dow_* , a(EmployeeID)  
	tabresults "*api* *tmax* "

	** RESULTS: non-linear extensive margin, original SE's, shanghai
	qui eststo: reg worked_that_day api_5_10 api_10_15 api_15_20 api_20_p hotel tmax_fah tmax_fah2 precipitation ym_* dow_* , cluster(Date) 
	qui eststo: areg worked_that_day api_5_10 api_10_15 api_15_20 api_20_p tmax_fah tmax_fah2 precipitation ym_* dow_* , cluster(Date) a(EmployeeID)

	tabresults "api_* *tmax* "

	** RESULTS: non-linear extensive margin, multi-way clustering, shanghai
	qui eststo: cgmreg worked_that_day api_5_10 api_10_15 api_15_20 api_20_p hotel tmax_fah tmax_fah2 precipitation ym_* dow_* , cluster(Date EmployeeID) 
	qui eststo: cgmreg r_worked_that_day r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_hotel r_tmax_fah r_tmax_fah2 r_precipitation r_ym_* r_dow_* , cluster(Date EmployeeID)  

	tabresults "*api* *tmax* "
restore


************************************************************
**   Study time-stamp hours, FP
************************************************************

preserve

	d hour* minute*
	sum hour* minute*

	sum timeclock_hours* 
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)
	sum timeclock_hours* 

	centile timeclock_hours , c(10 20 30 40 50 60 70 80 90 95 99)
	drop if timeclock_hours == .
	drop if timeclock_hours > 12

	gen log_timeclock_hours = log(timeclock_hours)


	** Restrict based on time-stamp hours and logged-in hours:
	** You have to be in the office for at least 15 minutes
	** longer than your logged-in time.
	keep if timeclock_hours - hours_logged_in > 0.25

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	** RESULTS: linear time-stamp hours, original SE's, merged sample
	qui eststo: reg timeclock_hours api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg timeclock_hours api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)
	qui eststo: reg log_timeclock_hours api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_timeclock_hours api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api ltemp ltemp2"

	** RESULTS: non-linear time-stamp hours, original SE's, merged sample
	qui eststo: reg timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p  ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)
	qui eststo: reg log_timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api_* ltemp ltemp2"

	take_out_worker_fe "timeclock_hours log_timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p api"

	** TABLE 2A just for R2: linear time-stamp hours, multi-way clustering, merged sample
	qui eststo: cgmreg timeclock_hours api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: areg timeclock_hours api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)
	tabresults "*api* *ltemp*"

	** TABLE 2A: linear time-stamp hours, multi-way clustering, merged sample
	qui eststo: cgmreg timeclock_hours api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_timeclock_hours r_api r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 
	tabresults "*api* *ltemp*"

	** TABLE 7B: linear time-stamp hours, multi-way clustering, nantong
	qui eststo: cgmreg timeclock_hours api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *ltemp*"

	** TABLE 7A: linear time-stamp hours, multi-way clustering, shanghai
	qui eststo: cgmreg timeclock_hours api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *ltemp*"

	** APPENDIX TABLE A2: linear time-stamp hours, multi-way clustering, nantong
	qui eststo: cgmreg r_timeclock_hours r_api r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *ltemp*"

	** APPENDIX TABLE A2 just for R2: linear time-stamp hours, nantong
	qui eststo: areg timeclock_hours api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, a(EmployeeID) 
	tabresults "*api* *ltemp*"

	** APPENDIX TABLE A2: linear time-stamp hours, multi-way clustering, shanghai
	qui eststo: cgmreg r_timeclock_hours r_api r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *ltemp*"

	** APPENDIX TABLE A2 just for R2: linear time-stamp hours, shanghai
	qui eststo: areg timeclock_hours api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, a(EmployeeID) 
	tabresults "*api* *ltemp*"

	** TABLE 2B just for R2: non-linear time-stamp hours, multi-way clustering, merged sample
	qui eststo: reg log_timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* , cluster(Date) 
	qui eststo: areg log_timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p ltemp ltemp2 ym_* dow_* , a(EmployeeID) cluster(Date) 
	tabresults "*api* *ltemp*"

	** TABLE 2B: non-linear time-stamp hours, multi-way clustering, merged sample
	qui eststo: cgmreg log_timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* , cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_timeclock_hours r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(Date EmployeeID) 
	tabresults "*api* *ltemp*"

	** RESULTS: non-linear time-stamp hours, multi-way clustering, nantong
	qui eststo: cgmreg timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date) 
	qui eststo: cgmreg r_timeclock_hours r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p  r_ltemp r_ltemp2 r_ym_* r_dow_* if nantong_prod_sample == 1, cluster(Date) 
	qui eststo: cgmreg log_timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date) 
	qui eststo: cgmreg r_log_timeclock_hours r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_* if nantong_prod_sample == 1, cluster(Date) 

	tabresults "*api* *ltemp*"

	** RESULTS: non-linear time-stamp hours, multi-way clustering, shanghai
	qui eststo: cgmreg timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date) 
	qui eststo: cgmreg r_timeclock_hours r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p  r_ltemp r_ltemp2 r_ym_* r_dow_* if shanghai_prod_sample == 1, cluster(Date) 
	qui eststo: cgmreg log_timeclock_hours api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date) 
	qui eststo: cgmreg r_log_timeclock_hours r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_* if shanghai_prod_sample == 1, cluster(Date) 

	tabresults "*api* *ltemp*"
restore

************************************************************
**   Time series approach, FP
************************************************************

** Referee 1 asks: Show time-series estimates with Newey-West standard errors. There would 
** be two time-series regressions, one per city. There would also be one observation 
** per city-day, containing mean productivity among workers in the day. 

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	gen minutes_logged = LogInLengthsecond / 60

	collapse (mean) NumCall minutes_logged (first) dow_* ym_* ltemp ltemp2 api, by(shanghai_prod_sample Date) fast

	** Paper over "gaps" in the time series...
	sort shanghai_prod_sample Date
	list shanghai_prod_sample Date , sepby(shanghai_prod_sample)
	by shanghai_prod_sample: gen pseudo_date = _n
	sum pseudo_date

	gen log_NumCall = log(NumCall)
	gen log_minutes_logged = log(minutes_logged)

	tsset shanghai_prod_sample pseudo_date

	** TABLE 7A: Time Series Approach, Shanghai
	qui eststo: newey log_NumCall api ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, lag(5)
	qui eststo: newey log_minutes_logged api ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, lag(5)
	tabresults "api ltemp ltemp2"

	** TABLE 7B: Time Series Approach, Nantong
	qui eststo: newey log_NumCall api ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 0, lag(5)
	qui eststo: newey log_minutes_logged api ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 0, lag(5)
	tabresults "api ltemp ltemp2"

restore

************************************************************
**   Home Effect, Shanghai, FP
************************************************************

preserve

	gen byte our_treated = (expgroup == 1)
	gen byte dring_expmt = (Date >= mdy(12, 6, 2010) & Date <= mdy(8, 14, 2011) )

	gen ltemp = tmax_fah
	gen ltemp2 = tmax_fah2 / 1000

	** RESTRICTIONS
	keep if expgroup == 0 | expgroup == 1 
	keep if dring_expmt == 1

	foreach pollutant in `pollutants' {
		gen `pollutant'_X_our_treated = `pollutant' * our_treated
		gen `pollutant'_at_home = `pollutant' * our_treated
		gen `pollutant'_not_at_home = `pollutant' * (1 - our_treated)
	}

	** Create consistent sample
	qui reg log_calls_per_phone_minutes junjie_api_xx hotel tmax_fah tmax_fah2 precipitation ym_* dow_* if our_treated == 0, cluster(Date) 
	gen byte sample_0 = e(sample)
	qui reg log_calls_per_phone_minutes junjie_api_xx hotel tmax_fah tmax_fah2 precipitation ym_* dow_* if our_treated == 1, cluster(Date) 
	gen byte sample_1 = e(sample)

	take_out_worker_fe "log_NumCall log_minutes_logged junjie_api_xx"

	** RESULTS: home effect, multi-way clustering, shanghai
	qui eststo: cgmreg log_NumCall junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 0 & sample_0 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 1 & sample_1 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_junjie_api_xx r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if our_treated == 0 & sample_0 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_junjie_api_xx r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if our_treated == 1 & sample_1 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 0 & sample_0 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 1 & sample_1 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_junjie_api_xx r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if our_treated == 0 & sample_0 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_junjie_api_xx r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if our_treated == 1 & sample_1 == 1, cluster(Date EmployeeID) 
	tabresults "*junjie_api_xx* *temp* "

	** TABLE 5: home effect, multi-way clustering, shanghai
	qui eststo: cgmreg log_NumCall junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 0 & sample_0 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 1 & sample_1 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 0 & sample_0 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 1 & sample_1 == 1, cluster(Date EmployeeID) 
	tabresults "*junjie_api_xx* *temp* "

	** APPENDIX TABLE A1: home effect, multi-way clustering, shanghai, all with FE
	qui eststo: cgmreg r_log_NumCall r_junjie_api_xx r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if our_treated == 0 & sample_0 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_junjie_api_xx r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if our_treated == 1 & sample_1 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_junjie_api_xx r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if our_treated == 0 & sample_0 == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_junjie_api_xx r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if our_treated == 1 & sample_1 == 1, cluster(Date EmployeeID) 
	tabresults "*junjie_api_xx* *temp* "

	** APPENDIX TABLE A1 just for R2: home effect, multi-way clustering, shanghai, all with FE
	qui eststo: areg log_NumCall junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 0 & sample_0 == 1, a(EmployeeID)
	qui eststo: areg log_NumCall junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 1 & sample_1 == 1, a(EmployeeID)
	qui eststo: areg log_minutes_logged junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 0 & sample_0 == 1, a(EmployeeID)
	qui eststo: areg log_minutes_logged junjie_api_xx hotel ltemp ltemp2 ym_* dow_* if our_treated == 1 & sample_1 == 1, a(EmployeeID)
	tabresults "*junjie_api_xx* *temp* "
restore

************************************************************
**   Sample Statistics, FP
************************************************************

preserve

	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1

	** WATCH OUT: for this table only, we go back to 10X
	replace api = api * 10

	** First pooled sample statistics for all days
	bysort Date: gen byte first_ob_of_day = (_n == 1)
	sum api if first_ob_of_day == 1
	sum api if first_ob_of_day == 1 & shanghai_prod_sample == 1
	sum api if first_ob_of_day == 1 & nantong_prod_sample == 1
	sum ltemp if first_ob_of_day == 1
	sum ltemp if first_ob_of_day == 1 & shanghai_prod_sample == 1
	sum ltemp if first_ob_of_day == 1 & nantong_prod_sample == 1

	** API by bin...
	sum api if api_0_5 == 1 & first_ob_of_day == 1
	sum api if api_5_10 == 1 & first_ob_of_day == 1
	sum api if api_10_15 == 1 & first_ob_of_day == 1
	sum api if api_15_20 == 1 & first_ob_of_day == 1
	sum api if api_20_p == 1 & first_ob_of_day == 1

	** API by bin... Shanghai
	sum api if api_0_5 == 1 & shanghai_prod_sample == 1 & first_ob_of_day == 1
	sum api if api_5_10 == 1 & shanghai_prod_sample == 1 & first_ob_of_day == 1
	sum api if api_10_15 == 1 & shanghai_prod_sample == 1 & first_ob_of_day == 1
	sum api if api_15_20 == 1 & shanghai_prod_sample == 1 & first_ob_of_day == 1
	sum api if api_20_p == 1 & shanghai_prod_sample == 1 & first_ob_of_day == 1

	** API by bin... Nantong
	sum api if api_0_5 == 1 & nantong_prod_sample == 1 & first_ob_of_day == 1
	sum api if api_5_10 == 1 & nantong_prod_sample == 1 & first_ob_of_day == 1
	sum api if api_10_15 == 1 & nantong_prod_sample == 1 & first_ob_of_day == 1
	sum api if api_15_20 == 1 & nantong_prod_sample == 1 & first_ob_of_day == 1
	sum api if api_20_p == 1 & nantong_prod_sample == 1 & first_ob_of_day == 1

	** Second, summarize worker-days
	codebook EmployeeID
	codebook EmployeeID if shanghai_prod_sample == 1
	codebook EmployeeID if nantong_prod_sample == 1

	count
	count if shanghai_prod_sample == 1
	count if nantong_prod_sample == 1

	sum NumCall
	sum NumCall if shanghai_prod_sample == 1
	sum NumCall if nantong_prod_sample == 1

	gen minutes_logged = LogInLengthsecond / 60
	sum minutes_logged 
	sum minutes_logged if shanghai_prod_sample == 1
	sum minutes_logged if nantong_prod_sample == 1

	sum minutes_on_phone
	sum minutes_on_phone if shanghai_prod_sample == 1
	sum minutes_on_phone if nantong_prod_sample == 1

	gen ph_min_per_call = minutes_on_phone / NumCall 
	sum ph_min_per_call
	sum ph_min_per_call if shanghai_prod_sample == 1
	sum ph_min_per_call if nantong_prod_sample == 1

restore

************************************************************
**   Productivity regression, FP
************************************************************

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	** Referee Table L1: Linear productivity, multi-way fixed effects, Merged Sample, adding time trends
	gen time_trend = Date
	gen time_trend2 = Date * Date
	gen time_trend3 = Date * Date * Date
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall api time_trend hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall api time_trend time_trend2 hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall api time_trend time_trend2 time_trend3 hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	tabresults "*api* *temp* time_*"

	** RESULTS: Linear productivity, original SE's, merged sample
	qui eststo: reg log_NumCall api hotel ltemp ltemp2 ym_* dow_* , cluster(Date) 
	qui eststo: areg log_NumCall api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	qui eststo: reg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_minutes_logged api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api ltemp ltemp2"

	** RESULTS: Non-linear productivity, original SE's, merged sample
	qui eststo: reg log_NumCall api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_NumCall api_5_10 api_10_15 api_15_20 api_20_p  ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	qui eststo: reg log_minutes_logged api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_minutes_logged api_5_10 api_10_15 api_15_20 api_20_p ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api_* ltemp ltemp2"

	take_out_worker_fe "log_NumCall log_minutes_logged api api_5_10 api_10_15 api_15_20 api_20_p "

	** TABLE 3A just for R2: Linear productivity, multi-way fixed effects, Merged Sample
	qui eststo: reg log_NumCall api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_NumCall api ltemp ltemp2 ym_* dow_* , cluster(Date) a(EmployeeID)

	qui eststo: reg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_minutes_logged api ltemp ltemp2 ym_* dow_* , cluster(Date) a(EmployeeID)
	tabresults "*api* *temp* "

	** Referee TABLE L2: Linear productivity, multi-way fixed effects, Merged Sample, cluster on week rather than date
	local sunday1 = mdy(1,6,1991)
	gen myweek = floor((Date - `sunday1')/7)
	qui eststo: reg log_NumCall api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_*, cluster(myweek EmployeeID) 
	qui eststo: reg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(myweek EmployeeID) 
	tabresults "*api* *temp* "

	** TABLE 3A: Linear productivity, multi-way fixed effects, Merged Sample
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(Date EmployeeID) 

	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** TABLE 7B: Linear productivity, multi-way fixed effects, Nantong
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** APPENDIX TABLE A2: Linear productivity, multi-way fixed effects, Nantong
	qui eststo: cgmreg r_log_NumCall r_api r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** APPENDIX TABLE A2 just for R2: Linear productivity, Nantong
	qui eststo: areg log_NumCall api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, a(EmployeeID) 
	qui eststo: areg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, a(EmployeeID) 
	tabresults "*api* *temp* "

	** TABLE 7A: Linear productivity, multi-way fixed effects, Shanghai
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** APPENDIX TABLE A2: Linear productivity, multi-way fixed effects, Shanghai
	qui eststo: cgmreg r_log_NumCall r_api r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** APPENDIX TABLE A2 just for R2: Linear productivity, Shanghai
	qui eststo: areg log_NumCall api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, a(EmployeeID) 
	qui eststo: areg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, a(EmployeeID) 
	tabresults "*api* *temp* "

	** TABLE 8A: Linear productivity, multi-way fixed effects, Shanghai without and with  precipitation
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall api hotel precipitation ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *temp* precipitation "

	** TABLE 8B: Linear productivity, multi-way fixed effects, Shanghai without and with  precipitation
	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged api hotel precipitation ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *temp* precipitation "

	** TABLE 3B just for R2: Non-Linear productivity, multi-way fixed effects, Merged Sample
	qui eststo: cgmreg log_NumCall api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: areg log_NumCall api_5_10 api_10_15 api_15_20 api_20_p ltemp ltemp2 ym_* dow_* , cluster(Date) a(EmployeeID)
	qui eststo: cgmreg log_minutes_logged api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: areg log_minutes_logged api_5_10 api_10_15 api_15_20 api_20_p ltemp ltemp2 ym_* dow_* , cluster(Date) a(EmployeeID)
	tabresults "*api* *temp* "

	** TABLE 3B: Non-Linear productivity, multi-way fixed effects, Merged Sample
	qui eststo: cgmreg log_NumCall api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** RESULTS: Non-Linear productivity, multi-way fixed effects, Nantong
	qui eststo: cgmreg log_NumCall api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_*  if nantong_prod_sample == 1, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_minutes_logged api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_*  if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** RESULTS: Non-Linear productivity, multi-way fixed effects, Shanghai 
	qui eststo: cgmreg log_NumCall api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_*  if shanghai_prod_sample == 1, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_minutes_logged api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_*  if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	tabresults "*api* *temp* "
restore

************************************************************
**   Extensive margin, combined sample, FP
************************************************************

preserve

	clear
	append using ../dta/nantong_extmarg_sample.dta
	append using ../dta/shanghai_extmarg_sample.dta

	tab nantong_piece , miss
	replace nantong_piece = 0 if nantong_piece == .

	** Describe overlap of dates
	egen mean_nantong = mean(nantong_piece) , by(Date)
	tab mean_nantong
	drop mean_nantong

	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if nantong_piece==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	sum api*
	sum api* if nantong_piece == 1
	sum api* if shanghai_piece == 1

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_piece ==1
	gen ltemp2 = ltemp*ltemp / 1000

	create_fes

	** RESULTS: linear extensive margin, original SE's, merged sample
	qui eststo: reg worked_that_day api hotel ltemp ltemp2 ym_* dow_* , cluster(Date) 
	qui eststo: areg worked_that_day api hotel ltemp ltemp2 ym_* dow_* , cluster(Date)  a(EmployeeID)

	tabresults "api ltemp ltemp2 "

	** Now we have partial out the employee fixed effects
	d, f
	foreach var in ///
	hotel api ltemp ltemp2 ///
	ym_201002 ym_201003 ym_201004 ym_201005 ym_201006 ym_201007 ym_201008 ym_201009 ym_201010 ym_201011 ym_201012 ym_201101 ym_201102 ym_201103 ym_201104 ym_201105 ym_201106 ym_201107 ym_201108 ym_201109 ym_201110 ym_201111 ym_201112 ym_201201 ym_201202 ym_201203 ym_201204 ym_201205 ym_201206 ym_201207 ym_201208 ym_201209 ym_201210 ym_201211 ym_201212 ///
	dow_0 dow_1 dow_2 dow_3 dow_4 dow_5 dow_6 ///
	worked_that_day api_0_5 api_5_10 api_10_15 api_15_20 api_20_p ///
	{
		disp "Now on variable `var'"
		qui areg `var' , a(EmployeeID) 
		predict r_`var' , resid
	}

	** TABLE 2A just for R2: linear extensive margin, multi-way clustering, merged sample
	qui eststo: cgmreg worked_that_day api hotel ltemp ltemp2 ym_* dow_* , cluster(Date EmployeeID) 
	qui eststo: areg worked_that_day api hotel ltemp ltemp2 ym_* dow_* , cluster(Date)  a(EmployeeID)

	tabresults "*api* *ltemp* "

	** TABLE 2A: linear extensive margin, multi-way clustering, merged sample
	qui eststo: cgmreg worked_that_day api hotel ltemp ltemp2 ym_* dow_* , cluster(Date EmployeeID) 
	qui eststo: cgmreg r_worked_that_day r_api r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(Date EmployeeID)  

	tabresults "*api* *ltemp* "

	******* ** Referee Table: linear extensive margin, multi-way clustering, merged sample, with same obs as time-clock data
	******* qui eststo: cgmreg worked_that_day api hotel ltemp ltemp2 ym_* dow_* if timeclock_hours != . , cluster(Date EmployeeID) 
	******* qui eststo: cgmreg r_worked_that_day r_api r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* if timeclock_hours != . , cluster(Date EmployeeID)  

	******* tabresults "*api* *ltemp* "

	** RESULTS: non-linear extensive margin, original SE's, merged sample
	qui eststo: reg worked_that_day api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* , cluster(Date) 
	qui eststo: areg worked_that_day api_5_10 api_10_15 api_15_20 api_20_p ltemp ltemp2 ym_* dow_* , cluster(Date) a(EmployeeID)

	tabresults "*api* *ltemp* "

	** TABLE 2B just for R2: non-linear extensive margin, multi-way clustering, merged sample
	qui eststo: cgmreg worked_that_day api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* , cluster(Date EmployeeID) 
	qui eststo: areg worked_that_day api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* , cluster(Date)  a(EmployeeID)

	tabresults "*api* *ltemp* "

	** TABLE 2B: non-linear extensive margin, multi-way clustering, merged sample
	qui eststo: cgmreg worked_that_day api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_* , cluster(Date EmployeeID) 
	qui eststo: cgmreg r_worked_that_day r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_hotel r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(Date EmployeeID)  

	tabresults "*api* *ltemp* "
restore

************************************************************
**   Graph some sort of time series
************************************************************

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	collapse (first) api (mean) NumCall , by(shanghai_prod_sample nantong_prod_sample Date) fast

	replace api = 10 * api

	local y2010 = mdy(1, 1, 2010)
	local y2011 = mdy(1, 1, 2011)
	local y2012 = mdy(1, 1, 2012)
	local y2013 = mdy(1, 1, 2013)

	** Just a time series of the two pollution numbers:
	local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
	graph set window fontface "Garamond" 
	twoway ///
	(connected api Date if shanghai_prod_sample == 1, sort lpattern(solid) lcolor(gray) mcolor(red) msymbol(d)) ///
	(connected api Date if nantong_prod_sample == 1 , sort lpattern(solid) lcolor(gray) mcolor(blue) msymbol(o)) ///
	, ///
	ylabel(, nogrid angle(horizontal)  ) ///
	scheme(s2mono)  ///
	aspectratio(`inv_golden_ratio') ///
	graphregion(fcolor(white))  ///
	xlabel(`y2010' "2010" `y2011' "2011" `y2012' "2012" `y2013' "2013") ///
	legend(off) /// legend(region(style(none)))
	yscale( nofextend ) xscale(nofextend) ///
	xtitle(" ") ///
	ytitle("API")
	graph save ../gph/pollution-time-series.gph , replace

	** Just a time series of the two productivity numbers:
	local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
	graph set window fontface "Garamond" 
	twoway ///
	(connected NumCall Date if shanghai_prod_sample == 1, sort lpattern(solid) lcolor(gray) mcolor(red) msymbol(d)) ///
	(connected NumCall Date if nantong_prod_sample == 1 , sort lpattern(solid) lcolor(gray) mcolor(blue) msymbol(o)) ///
	, ///
	ylabel(, nogrid angle(horizontal)  ) ///
	scheme(s2mono)  ///
	aspectratio(`inv_golden_ratio') ///
	graphregion(fcolor(white))  ///
	xlabel(`y2010' "2010" `y2011' "2011" `y2012' "2012" `y2013' "2013") ///
	legend(off) /// legend(region(style(none)))
	yscale( nofextend ) xscale(nofextend) ///
	ytitle("Average calls per day") ///
	xtitle(" ") 
	graph save ../gph/productivity-time-series.gph , replace

	** Now I try both on the same axes, Shanghai
	local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
	graph set window fontface "Garamond" 
	twoway ///
	(connected api Date if shanghai_prod_sample == 1, sort lpattern(dash) lcolor(gray) mcolor(red) msymbol(d) yaxis(1) ) ///
	(connected NumCall Date if shanghai_prod_sample == 1 , sort lpattern(solid) lcolor(gray) mcolor(blue) msymbol(o) yaxis(2) ) ///
	, ///
	ylabel(, nogrid angle(horizontal) axis(1)  ) ///
	ylabel(, nogrid angle(horizontal) axis(2)  ) ///
	scheme(s2mono)  ///
	aspectratio(`inv_golden_ratio') ///
	graphregion(fcolor(white))  ///
	legend(off) /// legend(region(style(none)))
	yscale( nofextend ) xscale(nofextend) ///
	ytitle("API" , axis(1) ) ///
	ytitle("Average calls per day" , axis(2) ) ///
	xtitle("Date") 
	graph save ../gph/raw-graph-shanghai.gph , replace

	** Now I try both on the same axes, Nantong
	local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
	graph set window fontface "Garamond" 
	twoway ///
	(connected api Date if nantong_prod_sample == 1, sort lpattern(dash) lcolor(gray) mcolor(red) msymbol(d) yaxis(1) ) ///
	(connected NumCall Date if nantong_prod_sample == 1 , sort lpattern(solid) lcolor(gray) mcolor(blue) msymbol(o) yaxis(2) ) ///
	, ///
	ylabel(, nogrid angle(horizontal) axis(1)  ) ///
	ylabel(, nogrid angle(horizontal) axis(2)  ) ///
	scheme(s2mono)  ///
	aspectratio(`inv_golden_ratio') ///
	graphregion(fcolor(white))  ///
	legend(off) /// legend(region(style(none)))
	yscale( nofextend ) xscale(nofextend) ///
	ytitle("API" , axis(1) ) ///
	ytitle("Average calls per day" , axis(2) ) ///
	xtitle("Date") 
	graph save ../gph/raw-graph-nantong.gph , replace

restore

************************************************************
**   Letter table on clustering and year-month FE
************************************************************

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	take_out_worker_fe "log_NumCall log_minutes_logged api api_5_10 api_10_15 api_15_20 api_20_p "

	** Our main results: Linear productivity, multi-way fixed effects, Merged Sample
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(Date EmployeeID) 

	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** Clustering on week: Linear productivity, multi-way fixed effects, Merged Sample
	** (I create a week indicator that begins each week on Sunday, rather than stata's lousy week(.) function)
	local sunday1 = mdy(1,6,1991)
	gen week = floor((Date - `sunday1')/7)
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_*, cluster(week EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(week EmployeeID) 

	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(week EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api r_ltemp r_ltemp2 r_ym_* r_dow_* , cluster(week EmployeeID) 
	tabresults "*api* *temp* "

	** Year and month FE: Linear productivity, multi-way fixed effects, Merged Sample
	** (I create a week indicator that begins each week on Sunday, rather than stata's lousy week(.) function)
	gen month = month(Date)
	gen year = year(Date)
	sum month year day_of_week
	xi i.year i.month i.day_of_week
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 _I*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 _I*, cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** Year and month FE: Linear productivity, original SE's, merged sample, original temperature controls
	qui eststo: reg log_NumCall api hotel ltemp ltemp2 _I* , cluster(Date) 
	qui eststo: areg log_NumCall api ltemp ltemp2 _I* , cluster(Date) a(EmployeeID)
	qui eststo: reg log_minutes_logged api hotel ltemp ltemp2 _I* , cluster(Date) 
	qui eststo: areg log_minutes_logged api ltemp ltemp2 _I* , cluster(Date) a(EmployeeID)

	tabresults "api ltemp ltemp2 "

restore

************************************************************
**   Experiment with temperature controls, FP
************************************************************

** Describe variation in temperature...
preserve

	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)
	bysort shanghai_prod_sample Date: keep if _n == 1

	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	centile ltemp if shanghai_prod_sample == 1 , c(1 2 3 4 5 10 20 30 40 50 60 70 80 90 95 96 97 98 99)
	centile ltemp if shanghai_prod_sample == 0 , c(1 2 3 4 5 10 20 30 40 50 60 70 80 90 95 96 97 98 99)

	centile ltemp if shanghai_prod_sample == 1 , c(25 50 75)
	local shanghai_p25 = r(c_1)
	local shanghai_p50 = r(c_2)
	local shanghai_p75 = r(c_3)
	centile ltemp if shanghai_prod_sample == 0 , c(25 50 75)
	local nantong_p25 = r(c_1)
	local nantong_p50 = r(c_2)
	local nantong_p75 = r(c_3)

restore

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000
	make_weather_bins
	tab1 na_* sh_*

	** Create temperature bins interacted with city indicators...
	gen byte ltemp_1_sh = (shanghai_prod_sample == 1 & ltemp >= -100 & ltemp < `shanghai_p25')
	gen byte ltemp_2_sh = (shanghai_prod_sample == 1 & ltemp >= `shanghai_p25' & ltemp < `shanghai_p50')
	gen byte ltemp_3_sh = (shanghai_prod_sample == 1 & ltemp >= `shanghai_p50' & ltemp < `shanghai_p75')
	gen byte ltemp_4_sh = (shanghai_prod_sample == 1 & ltemp >= `shanghai_p75' & ltemp < .)
	gen byte ltemp_1_na = (nantong_prod_sample == 1 & ltemp >= -100 & ltemp < `nantong_p25')
	gen byte ltemp_2_na = (nantong_prod_sample == 1 & ltemp >= `nantong_p25' & ltemp < `nantong_p50')
	gen byte ltemp_3_na = (nantong_prod_sample == 1 & ltemp >= `nantong_p50' & ltemp < `nantong_p75')
	gen byte ltemp_4_na = (nantong_prod_sample == 1 & ltemp >= `nantong_p75' & ltemp < .)

	** TABLE 8A: Linear productivity, multi-way fixed effects, Merged Sample
	qui eststo: cgmreg log_NumCall api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall api hotel ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall api hotel ltemp_m_40 ltemp_40_50 ltemp_50_60 ltemp_60_70 ltemp_70_80 ltemp_80_p ym_* dow_*, cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** TABLE 8B: Linear productivity, multi-way fixed effects, Merged Sample
	qui eststo: cgmreg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged api hotel ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged api hotel ltemp_m_40 ltemp_40_50 ltemp_50_60 ltemp_60_70 ltemp_70_80 ltemp_80_p ym_* dow_*, cluster(Date EmployeeID) 
	tabresults "*api* *temp* "

	** RESULTS: Linear productivity, original SE's, merged sample, original temperature controls
	qui eststo: reg log_NumCall api hotel ltemp ltemp2 ym_* dow_* , cluster(Date) 
	qui eststo: areg log_NumCall api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)
	qui eststo: reg log_minutes_logged api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_minutes_logged api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api ltemp ltemp2 "

	** RESULTS: Linear productivity, original SE's, merged sample, relative temperature bins
	qui eststo: reg log_NumCall api hotel ltemp_1_sh ltemp_2_sh ltemp_3_sh ltemp_4_sh ltemp_1_na ltemp_2_na ltemp_3_na ltemp_4_na ym_* dow_* , cluster(Date) 
	qui eststo: areg log_NumCall api ltemp_1_sh ltemp_2_sh ltemp_3_sh ltemp_4_sh ltemp_1_na ltemp_2_na ltemp_3_na ltemp_4_na ym_* dow_*, cluster(Date) a(EmployeeID)
	qui eststo: reg log_minutes_logged api hotel ltemp_1_sh ltemp_2_sh ltemp_3_sh ltemp_4_sh ltemp_1_na ltemp_2_na ltemp_3_na ltemp_4_na ym_* dow_*, cluster(Date) 
	qui eststo: areg log_minutes_logged api ltemp_1_sh ltemp_2_sh ltemp_3_sh ltemp_4_sh ltemp_1_na ltemp_2_na ltemp_3_na ltemp_4_na ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api ltemp_1_sh ltemp_2_sh ltemp_3_sh ltemp_4_sh ltemp_1_na ltemp_2_na ltemp_3_na ltemp_4_na "

	** RESULTS: Linear productivity, original SE's, merged sample, absolute temperature bins
	qui eststo: reg log_NumCall api hotel na_ltemp_m_40 na_ltemp_40_50 na_ltemp_50_60 na_ltemp_60_70 na_ltemp_70_80 na_ltemp_80_p sh_ltemp_m_40 sh_ltemp_40_50 sh_ltemp_50_60 sh_ltemp_60_70 sh_ltemp_70_80 sh_ltemp_80_p ym_* dow_* , cluster(Date) 
	qui eststo: areg log_NumCall api na_ltemp_m_40 na_ltemp_40_50 na_ltemp_50_60 na_ltemp_60_70 na_ltemp_70_80 na_ltemp_80_p sh_ltemp_m_40 sh_ltemp_40_50 sh_ltemp_50_60 sh_ltemp_60_70 sh_ltemp_70_80 sh_ltemp_80_p ym_* dow_*, cluster(Date) a(EmployeeID)
	qui eststo: reg log_minutes_logged api hotel na_ltemp_m_40 na_ltemp_40_50 na_ltemp_50_60 na_ltemp_60_70 na_ltemp_70_80 na_ltemp_80_p sh_ltemp_m_40 sh_ltemp_40_50 sh_ltemp_50_60 sh_ltemp_60_70 sh_ltemp_70_80 sh_ltemp_80_p ym_* dow_*, cluster(Date) 
	qui eststo: areg log_minutes_logged api na_ltemp_m_40 na_ltemp_40_50 na_ltemp_50_60 na_ltemp_60_70 na_ltemp_70_80 na_ltemp_80_p sh_ltemp_m_40 sh_ltemp_40_50 sh_ltemp_50_60 sh_ltemp_60_70 sh_ltemp_70_80 sh_ltemp_80_p ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api na_ltemp_m_40 na_ltemp_40_50 na_ltemp_50_60 na_ltemp_60_70 na_ltemp_70_80 na_ltemp_80_p sh_ltemp_m_40 sh_ltemp_40_50 sh_ltemp_50_60 sh_ltemp_60_70 sh_ltemp_70_80 sh_ltemp_80_p"

	** RESULTS: Linear productivity, original SE's, merged sample, absolute temperature bins overall
	qui eststo: reg log_NumCall api hotel ltemp_m_40 ltemp_40_50 ltemp_50_60 ltemp_60_70 ltemp_70_80 ltemp_80_p ym_* dow_* , cluster(Date) 
	qui eststo: areg log_NumCall api ltemp_m_40 ltemp_40_50 ltemp_50_60 ltemp_60_70 ltemp_70_80 ltemp_80_p ym_* dow_*, cluster(Date) a(EmployeeID)
	qui eststo: reg log_minutes_logged api hotel ltemp_m_40 ltemp_40_50 ltemp_50_60 ltemp_60_70 ltemp_70_80 ltemp_80_p ym_* dow_*, cluster(Date) 
	qui eststo: areg log_minutes_logged api ltemp_m_40 ltemp_40_50 ltemp_50_60 ltemp_60_70 ltemp_70_80 ltemp_80_p ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api ltemp_m_40 ltemp_40_50 ltemp_50_60 ltemp_60_70 ltemp_70_80 ltemp_80_p"

restore

************************************************************
**   Experiment with more leads and lags... , FP
************************************************************

foreach outcome in log_NumCall log_minutes_on_phone {

	preserve

		keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

		** Creating single local API measure **
		gen api_m0 = junjie_api_xx
		replace api_m0 = nantong_shuang_api_pm10 if nantong_prod_sample == 1

		** Create leads and lags... 
		foreach i in 1 2 3 4 {
			gen api_m`i' = junjie_api_xx_m`i'
			replace api_m`i' = nantong_shuang_api_pm10_m`i' if nantong_prod_sample == 1
		}
		foreach i in 1 2 3 4 {
			gen api_p`i' = junjie_api_xx_p`i'
			replace api_p`i' = nantong_shuang_api_pm10_p`i' if nantong_prod_sample == 1
		}

		** Creating single local temp measure **
		gen ltemp=tmax_fah
		replace ltemp=nantong_temp if nantong_prod_sample==1
		gen ltemp2 = ltemp*ltemp / 1000

		** Main effect with 3 leads and 3 lags
		qui cgmreg `outcome' api_p3 api_p2 api_p1 api_m0 api_m1 api_m2 api_m3 hotel ltemp ltemp2 ym_* dow_* , cluster(Date EmployeeID) 

		matrix event_study = J(200, 4, 0)
		local row = 1 

		forvalues i = 1(1)3 {
			matrix event_study[`row', 1] = +`i' 
			matrix event_study[`row', 2] = _b[api_p`i'] - 1.98 * _se[api_p`i']
			matrix event_study[`row', 3] = _b[api_p`i'] 
			matrix event_study[`row', 4] = _b[api_p`i'] + 1.98 * _se[api_p`i']
			local row = `row' + 1
		}

		forvalues i = 0(1)3 {
			matrix event_study[`row', 1] = -`i'
			matrix event_study[`row', 2] = _b[api_m`i'] - 1.98 * _se[api_m`i']
			matrix event_study[`row', 3] = _b[api_m`i'] 
			matrix event_study[`row', 4] = _b[api_m`i'] + 1.98 * _se[api_m`i']
			local row = `row' + 1
		}

		drop _all
		svmat event_study
		rename event_study1 time
		rename event_study2 b1
		rename event_study3 b2
		rename event_study4 b3
		drop if time == 0 & b1 == 0 & b2 ==0 & b3 == 0 
		list

		reshape long b , i(time) j(type)
		list

		gen lower_bound_tmp = b if type == 1
		gen upper_bound_tmp = b if type == 3
		egen lower_bound = max(lower_bound_tmp) , by(time)
		egen upper_bound = max(upper_bound_tmp) , by(time)

		disp "Input into the figure for outcome `outcome'"
		list time b lower_bound upper_bound if type == 2, clean

		format upper_bound lower_bound b %-4.3f

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		tw ///
		(rcap lower_bound upper_bound time, ) ///
		(scatter b time if type == 2 , symbol(o) mcolor(blue) lcolor(blue) ) ///
		, ///
		scheme(s2mono) ylabel(, nogrid  angle(horizontal)  ) graphregion(fcolor(white)) ///
		yline(0) ///
		legend(off) ///
		aspectratio(`inv_golden_ratio') ///
		yscale( nofextend ) xscale(nofextend) ///
		xlabel(-3 `""3 days" "previous""' -2 `""2 days" "previous""' -1 `""Previous" "day""' 0 `""Contemporaneous" "effect""' 1 `""Next" "day""' 2 `""2 days" "after""' 3 `""3 days" "after""') ///
		xtitle(" ") ///
		ytitle(" ") ///
		title(" ")
		graph save ../gph/leadlags-`outcome'-3lags-3lead-mergedsample.gph , replace
	restore
}

************************************************************
**   Leads & Lags for Main Effect, merged sample, FP
************************************************************

foreach outcome in log_NumCall log_minutes_on_phone {

	preserve

		keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

		** Creating single local API measure **
		gen api_m0 = junjie_api_xx
		replace api_m0 = nantong_shuang_api_pm10 if nantong_prod_sample == 1

		** Create leads and lags... 
		foreach i in 1 2 3 4 {
			gen api_m`i' = junjie_api_xx_m`i'
			replace api_m`i' = nantong_shuang_api_pm10_m`i' if nantong_prod_sample == 1
		}
		foreach i in 1 2 {
			gen api_p`i' = junjie_api_xx_p`i'
			replace api_p`i' = nantong_shuang_api_pm10_p`i' if nantong_prod_sample == 1
		}

		** Creating single local temp measure **
		gen ltemp=tmax_fah
		replace ltemp=nantong_temp if nantong_prod_sample==1
		gen ltemp2 = ltemp*ltemp / 1000

		** Main effect with 1 lead and 3 lags
		qui cgmreg `outcome' api_p1 api_m0 api_m1 api_m2 api_m3 hotel ltemp ltemp2 ym_* dow_* , cluster(Date EmployeeID) 

		matrix event_study = J(200, 4, 0)
		local row = 1 

		forvalues i = 1(1)1 {
			matrix event_study[`row', 1] = +`i' 
			matrix event_study[`row', 2] = _b[api_p`i'] - 1.98 * _se[api_p`i']
			matrix event_study[`row', 3] = _b[api_p`i'] 
			matrix event_study[`row', 4] = _b[api_p`i'] + 1.98 * _se[api_p`i']
			local row = `row' + 1
		}

		forvalues i = 0(1)3 {
			matrix event_study[`row', 1] = -`i'
			matrix event_study[`row', 2] = _b[api_m`i'] - 1.98 * _se[api_m`i']
			matrix event_study[`row', 3] = _b[api_m`i'] 
			matrix event_study[`row', 4] = _b[api_m`i'] + 1.98 * _se[api_m`i']
			local row = `row' + 1
		}

		drop _all
		svmat event_study
		rename event_study1 time
		rename event_study2 b1
		rename event_study3 b2
		rename event_study4 b3
		drop if time == 0 & b1 == 0 & b2 ==0 & b3 == 0 
		list

		reshape long b , i(time) j(type)
		list

		gen lower_bound_tmp = b if type == 1
		gen upper_bound_tmp = b if type == 3
		egen lower_bound = max(lower_bound_tmp) , by(time)
		egen upper_bound = max(upper_bound_tmp) , by(time)

		disp "Input into the figure for outcome `outcome'"
		list time b lower_bound upper_bound if type == 2, clean

		format upper_bound lower_bound b %-4.3f

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		tw ///
		(rcap lower_bound upper_bound time, ) ///
		(scatter b time if type == 2 , symbol(o) mcolor(blue) lcolor(blue) ) ///
		, ///
		scheme(s2mono) ylabel(, nogrid  angle(horizontal)  ) graphregion(fcolor(white)) ///
		yline(0) ///
		legend(off) ///
		aspectratio(`inv_golden_ratio') ///
		yscale( nofextend ) xscale(nofextend) ///
		xlabel(-3 `""Three days" "previous""' -2 `""Two days" "previous""' -1 `""One day" "before""' 0 `""Contemporaneous" "effect""' 1 `""Next" "day""') ///
		xtitle(" ") ///
		ytitle(" ") ///
		title(" ")
		graph save ../gph/leadlags-`outcome'-3lags-1lead-mergedsample.gph , replace
	restore
}

************************************************************
**   Create re-scaled histograms of our pollution measures, FP
************************************************************

codebook Date if shanghai_prod_sample == 1 
codebook Date if nantong_prod_sample == 1 


foreach bin in 5 10 25 50 {
	** First, the Shanghai histogram
	preserve

		keep if shanghai_prod_sample == 1

		bysort Date: keep if _n == 1

		** We re-normalize pollution here:
		replace junjie_api_xx = junjie_api_xx * 10

		replace junjie_api_xx = 200 if junjie_api_xx > 200 & junjie_api_xx < .

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		histogram junjie_api_xx , ///
		fcolor(gs10) ///
		ylabel(, nogrid angle(horizontal)  ) ///
		width(`bin') ///
		start(0) ///
		frequency ///
		scheme(s2mono) ///
		xscale( range(0 200 ) ) ///
		xlabel(0 "0" 20 "20" 40 "40" 60 "60" 80 "80" 100 "100" 120 "120" 140 "140" 160 "160" 180 "180" 200 "≥200") ///
		graphregion(fcolor(white)) ///
		aspectratio(`inv_golden_ratio') ///
		legend(off) ///
		yscale( nofextend ) xscale(nofextend) ///
		xtitle("API") ytitle(" ") 
		graph save ../gph/histogram-shanghai`bin'.gph , replace

	restore

	** Second, the Nantong histogram
	preserve

		keep if nantong_prod_sample == 1

		bysort Date: keep if _n == 1

		** We re-normalize pollution here:
		replace nantong_shuang_api_pm10 = nantong_shuang_api_pm10 * 10

		replace nantong_shuang_api_pm10 = 200 if nantong_shuang_api_pm10 > 200 & nantong_shuang_api_pm10 < .

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		histogram nantong_shuang_api_pm10 , ///
		fcolor(gs10) ///
		ylabel(, nogrid angle(horizontal)  ) ///
		width(`bin') ///
		start(0) ///
		frequency ///
		scheme(s2mono) ///
		xscale( range(0 200 ) ) ///
		xlabel(0 "0" 20 "20" 40 "40" 60 "60" 80 "80" 100 "100" 120 "120" 140 "140" 160 "160" 180 "180" 200 "≥200") ///
		graphregion(fcolor(white)) ///
		aspectratio(`inv_golden_ratio') ///
		legend(off) ///
		yscale( nofextend ) xscale(nofextend) ///
		xtitle("API") ytitle(" ") 
		graph save ../gph/histogram-nantong`bin'.gph , replace

	restore
}

************************************************************
**   Beijing Check, FP
************************************************************

** Bring in Beijing data
preserve
	insheet using ../src/beijing10-12.csv , comma clear
	d, f
	list in 1/10
	tab city

	rename aqi beijing_aqi
	sum beijing_aqi
	rename primarypollutant beijing_primarypollutant

	codebook date
	rename date date_orig
	gen Date = date(date_orig, "MD20Y")
	format Date %td
	list Date date_orig in 1/20

	bysort Date: gen N = _N
	tab N
	list if N == 2
	duplicates drop
	isid Date

	** Note: The other API's are divided by 10
	sum beijing_aqi
	replace beijing_aqi = beijing_aqi / 10

	keep Date beijing_primarypollutant beijing_aqi 

	tempfile beijing
	save `beijing'
restore

** Now merge in the Beijing data and run our falsification check...
preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	merge m:1 Date using `beijing'
	tab Date _merge
	drop if _merge == 2
	drop _merge

	** Just briefly summarize Beijing AQI
	bysort Date: gen first_ob = (_n == 1)
	sum beijing_aqi if first_ob == 1
	tab beijing_primarypollutant if first_ob == 1
	drop first_ob

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	** Create other-city pollution
	gen other_api = junjie_api_xx
	replace other_api = nantong_shuang_api_pm10 if shanghai_prod_sample == 1

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	** RESULTS: Beijing falsification checks, original SE's, merged sample
	qui eststo: reg log_NumCall api other_api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_NumCall api beijing_aqi ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	qui eststo: reg log_minutes_logged api other_api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_minutes_logged api beijing_aqi ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api other_api beijing_aqi ltemp ltemp2"

	take_out_worker_fe "log_waiting_ratio log_minutes_on_phone log_waiting_time log_minutes_logged log_ph_min_per_call log_NumCall api other_api beijing_aqi"

	** RESULTS: Beijing falsification checks, multi-way clustering, merged sample
	qui eststo: cgmreg log_NumCall api other_api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_NumCall api beijing_aqi hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_minutes_logged api other_api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg log_minutes_logged api beijing_aqi hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 

	tabresults "*api* *aqi* *temp* " 

	** TABLE 6 just for R2: Beijing falsification checks, multi-way clustering, merged sample
	qui eststo: areg log_NumCall api other_api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)
	qui eststo: areg log_NumCall api beijing_aqi ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)
	qui eststo: areg log_minutes_logged api other_api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)
	qui eststo: areg log_minutes_logged api beijing_aqi ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "*api* *aqi* *temp* " 

	** TABLE 6: Beijing falsification checks, multi-way clustering, merged sample
	qui eststo: cgmreg r_log_NumCall r_api r_other_api r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api r_beijing_aqi r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api r_other_api r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_logged r_api r_beijing_aqi r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 

	tabresults "*api* *aqi* *temp* " 
restore

************************************************************
**   Graph histograms of dependent variable, FP
************************************************************

foreach bin in 5 {
	** First, the across-worker histogram
	preserve
		keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

		tab employee_type , sort
		label list employee_type
		*keep if employee_type >= 3 & employee_type <= 7 // "call"
		*keep if employee_type >= 8 & employee_type <= 13 // "notification"
		*keep if employee_type >= 14 & employee_type <= 17 // "order"
		sum NumCall if employee_type >= 3 & employee_type <= 7 // "call"
		sum NumCall if employee_type >= 8 & employee_type <= 13 // "notification"
		sum NumCall if employee_type >= 14 & employee_type <= 17 // "order"

		collapse (mean) NumCall , by(EmployeeID) fast
		sum NumCall
		codebook EmployeeID

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		histogram NumCall , ///
		fcolor(gs10) ///
		ylabel(, nogrid angle(horizontal)  ) ///
		width(`bin') ///
		start(0) ///
		frequency ///
		scheme(s2mono) ///
		graphregion(fcolor(white)) ///
		aspectratio(`inv_golden_ratio') ///
		legend(off) ///
		yscale( nofextend ) xscale(nofextend) ///
		xtitle("Mean calls per day") ytitle(" ") 
		graph save ../gph/histogram-across-worker-`bin'.gph , replace

	restore

	** Second, the across-day histogram
	preserve

		keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

		collapse (mean) NumCall , by(Date) fast
		codebook Date
		sum NumCall

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		histogram NumCall , ///
		fcolor(gs10) ///
		ylabel(, nogrid angle(horizontal)  ) ///
		width(`bin') ///
		start(0) ///
		frequency ///
		scheme(s2mono) ///
		graphregion(fcolor(white)) ///
		aspectratio(`inv_golden_ratio') ///
		legend(off) ///
		yscale( nofextend ) xscale(nofextend) ///
		xtitle("Mean calls per day") ytitle(" ") 
		graph save ../gph/histogram-across-day-`bin'.gph , replace

	restore
}


************************************************************
**   Productivity non-linear graph
************************************************************

** Here we take the "non-linear" productivity specification and calculate
** the marginal effects

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	tempfile working 
	save `working'

	foreach outcome in NumCall minutes_logged {
		use `working' , clear

		** RESULTS: Non-Linear productivity, multi-way fixed effects, Merged Sample
		qui eststo: cgmreg log_`outcome' api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 

		sum api if api_0_5 == 1
		local baseline = r(mean)
		disp "Baseline is `baseline'"
		
		matrix api_numbers = J(10,10,.)
		local row = 1
		foreach apivar in api_5_10 api_10_15 api_15_20 api_20_p {
			sum api if `apivar' == 1
			local change = r(mean) - `baseline'
			disp "Change is `change'"
			matrix api_numbers[`row', 1] = _b[`apivar'] / `change'
			matrix api_numbers[`row', 2] = _b[`apivar'] / `change' - 1.98 * _se[`apivar'] / `change'
			matrix api_numbers[`row', 3] = _b[`apivar'] / `change' + 1.98 * _se[`apivar'] / `change'
			local row = `row' + 1
		}

		drop _all
		svmat api_numbers
		rename api_numbers1 beta
		rename api_numbers2 lower
		rename api_numbers3 upper
		gen time = _n
		list
		drop if beta == .

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		tw ///
		(rcap lower upper time, ) ///
		(scatter beta time , symbol(o) mcolor(blue) lcolor(blue) ) ///
		, ///
		scheme(s2mono) ylabel(, nogrid  angle(horizontal)  ) graphregion(fcolor(white)) ///
		yline(0) ///
		legend(off) ///
		aspectratio(`inv_golden_ratio') ///
		yscale( nofextend ) xscale(nofextend) ///
		xlabel(1 "API 50–100" 2 "API 100–150" 3 "API 150–200" 4 "API 200+    ") ///
		xtitle(" ") ///
		ytitle(" ") ///
		title(" ")
		graph save ../gph/merged-non-linear-translation-`outcome'.gph , replace
	}
	
restore

************************************************************
**   Stratify basic, overall results by employee type
************************************************************

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	tab employee_type if shanghai_prod_sample == 1, miss
	tab employee_type if nantong_prod_sample == 1, miss

	** Describe variation in employee type by worker
	tempfile working
	save `working'
	gen byte worker_other = employee_type == 1 | employee_type == 2 | employee_type == 18
	gen byte worker_call = employee_type >= 3 & employee_type <= 7 // "call"
	gen byte worker_notify = employee_type >= 8 & employee_type <= 13 // "notification"
	gen byte worker_order = employee_type >= 14 & employee_type <= 17 // "order"
	collapse (mean) worker_call worker_notify worker_order worker_other , by(EmployeeID) fast
	summarize
	list
	use `working' , clear

	** RESULTS: Linear productivity, original SE's, merged sample, by employee type
	qui eststo: reg log_NumCall api hotel ltemp ltemp2 ym_* dow_* , cluster(Date) 
	qui eststo: areg log_NumCall api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	qui eststo: reg log_NumCall api hotel ltemp ltemp2 ym_* dow_* if employee_type >= 3 & employee_type <= 7 , cluster(Date) 
	qui eststo: areg log_NumCall api ltemp ltemp2 ym_* dow_* if employee_type >= 3 & employee_type <= 7 , cluster(Date) a(EmployeeID)

	qui eststo: reg log_NumCall api hotel ltemp ltemp2 ym_* dow_* if employee_type >= 8 & employee_type <= 13 , cluster(Date) 
	qui eststo: areg log_NumCall api ltemp ltemp2 ym_* dow_* if employee_type >= 8 & employee_type <= 13 , cluster(Date) a(EmployeeID)

	qui eststo: reg log_NumCall api hotel ltemp ltemp2 ym_* dow_* if employee_type >= 14 & employee_type <= 17 , cluster(Date) 
	qui eststo: areg log_NumCall api ltemp ltemp2 ym_* dow_* if employee_type >= 14 & employee_type <= 17 , cluster(Date) a(EmployeeID)

	tabresults "api ltemp ltemp2"

restore

************************************************************
**   Graph main effect as a scatter plot
************************************************************

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen other_api = junjie_api_xx
	replace other_api = nantong_shuang_api_pm10 if shanghai_prod_sample == 1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	tempfile preloop
	save `preloop'

	foreach outcome in NumCall minutes_on_phone {

		use `preloop' , clear

		collapse (mean) `outcome' (first) api , by(Date) fast

		twoway ///
		(scatter `outcome' api , mcolor(blue) msymbol(o)  ) ///
		(lfit `outcome' api , sort lpattern(dash) mcolor(blue) msymbol(i)  ) ///
		, ///
		ylabel(, nogrid angle(horizontal)  ) ///
		scheme(s2mono)  ///
		aspectratio(`inv_golden_ratio') ///
		graphregion(fcolor(white))  ///
		legend(off) ///
		yscale( nofextend ) xscale(nofextend) ///
		xtitle(" ") ytitle(" ") 

		graph save ../gph/main-scatter-`outcome'-mergedsample.gph , replace

	}

restore

************************************************************
**   Demand regressions / Falsification checks, FP
************************************************************

** NOTE: We append these two columns to the "work-from-home" table

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen other_api = junjie_api_xx
	replace other_api = nantong_shuang_api_pm10 if shanghai_prod_sample == 1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	** Tabulation of waiting ratio
	sum LogInLengthsecond CallLengthsecond waiting_ratio 
	centile waiting_ratio , c(1 5 10 20 30 40 50 60 70 80 90 95 99)
	sum LogInLengthsecond CallLengthsecond waiting_ratio if waiting_ratio > 0 & waiting_ratio < 1
	sum LogInLengthsecond CallLengthsecond waiting_ratio if waiting_ratio > 0 & waiting_ratio < 1 & hotel == 1
	sum LogInLengthsecond CallLengthsecond waiting_ratio if waiting_ratio > 0 & waiting_ratio < 1 & hotel == 0
	centile waiting_ratio if waiting_ratio > 0 & waiting_ratio < 1, c(1 5 10 20 30 40 50 60 70 80 90 95 99)
	centile waiting_ratio if waiting_ratio > 0 & waiting_ratio < 1 & hotel == 1, c(1 5 10 20 30 40 50 60 70 80 90 95 99)
	centile waiting_ratio if waiting_ratio > 0 & waiting_ratio < 1 & hotel == 0, c(1 5 10 20 30 40 50 60 70 80 90 95 99)
	centile waiting_ratio if waiting_ratio > 0 & waiting_ratio < 1 & shanghai_prod_sample == 1, c(1 5 10 20 30 40 50 60 70 80 90 95 99)
	centile waiting_ratio if waiting_ratio > 0 & waiting_ratio < 1 & nantong_prod_sample == 1, c(1 5 10 20 30 40 50 60 70 80 90 95 99)
	compare LogInLengthsecond CallLengthsecond
	compare LogInLengthsecond CallLengthsecond if shanghai_prod_sample == 1
	compare LogInLengthsecond CallLengthsecond if nantong_prod_sample == 1
	table day_of_week if waiting_ratio > 0 & waiting_ratio < 1, c(mean waiting_ratio)

	** RESULTS: falsification checks, original SE's, merged sample
	qui eststo: reg log_NumCall api other_api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_NumCall api other_api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	qui eststo: reg log_waiting_time api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_waiting_time api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api other_api ltemp ltemp2"

	take_out_worker_fe "log_waiting_ratio log_minutes_on_phone log_waiting_time log_ph_min_per_call log_NumCall api_5_10 api_10_15 api_15_20 api_20_p api other_api"

	** RESULTS: falsification checks, multi-way clustering, merged sample
	qui eststo: cgmreg log_NumCall api other_api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api r_other_api r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_waiting_time api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_waiting_time r_api r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_waiting_ratio api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_waiting_ratio r_api r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 

	tabresults "*api* *temp* " 

	** RESULTS: falsification checks, multi-way clustering, nantong
	qui eststo: cgmreg log_NumCall api other_api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api r_other_api r_ltemp r_ltemp2 r_ym_* r_dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_waiting_time api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_waiting_time r_api r_ltemp r_ltemp2 r_ym_* r_dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 

	tabresults "*api* *temp* " 

	** RESULTS: falsification checks, multi-way clustering, shanghai
	qui eststo: cgmreg log_NumCall api other_api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_NumCall r_api r_other_api r_ltemp r_ltemp2 r_ym_* r_dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_waiting_time api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_waiting_time r_api r_ltemp r_ltemp2 r_ym_* r_dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 

	tabresults "*api* *temp* " 

restore

************************************************************
**   Mechanism Table, FP
************************************************************

preserve
	keep if (shanghai_prod_sample == 1 | nantong_prod_sample == 1)

	** Creating single local API measure **
	gen api=junjie_api_xx
	replace api=nantong_shuang_api_pm10 if  nantong_prod_sample==1

	gen byte api_0_5 = (api > 0 & api <= 5 )
	gen byte api_5_10 = (api > 5 & api <= 10 )
	gen byte api_10_15 = (api > 10 & api <= 15 )
	gen byte api_15_20 = (api > 15 & api <= 20 )
	gen byte api_20_p = (api > 20 & api < . )

	** Creating single local temp measure **
	gen ltemp=tmax_fah
	replace ltemp=nantong_temp if nantong_prod_sample==1
	gen ltemp2 = ltemp*ltemp / 1000

	** RESULTS: mechanism outcomes, original SE's, merged sample
	qui eststo: reg log_minutes_on_phone api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_minutes_on_phone api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	qui eststo: reg log_ph_min_per_call api hotel ltemp ltemp2 ym_* dow_*, cluster(Date) 
	qui eststo: areg log_ph_min_per_call api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "api ltemp ltemp2"

	take_out_worker_fe "log_minutes_on_phone log_waiting_time log_ph_min_per_call log_NumCall api_5_10 api_10_15 api_15_20 api_20_p api "

	** TABLE 4A just for R2: mechanism outcomes, linear, multi-way clustering, merged sample
	qui eststo: cgmreg log_minutes_on_phone api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: areg log_minutes_on_phone api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	qui eststo: cgmreg log_ph_min_per_call api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: areg log_ph_min_per_call api ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "*api* *temp* " 

	** TABLE 4A: mechanism outcomes, linear, multi-way clustering, merged sample
	qui eststo: cgmreg log_minutes_on_phone api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_on_phone r_api r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_ph_min_per_call api hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_ph_min_per_call r_api r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 

	tabresults "*api* *temp* " 

	** TABLE 4B just for R2: mechanism outcomes, non-linear, multi-way clustering, merged sample
	qui eststo: cgmreg log_minutes_on_phone api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: areg log_minutes_on_phone api_5_10 api_10_15 api_15_20 api_20_p ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	qui eststo: cgmreg log_ph_min_per_call api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: areg log_ph_min_per_call api_5_10 api_10_15 api_15_20 api_20_p ltemp ltemp2 ym_* dow_*, cluster(Date) a(EmployeeID)

	tabresults "*api* *temp* " 

	** TABLE 4B: mechanism outcomes, non-linear, multi-way clustering, merged sample
	qui eststo: cgmreg log_minutes_on_phone api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_on_phone r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_ph_min_per_call api_5_10 api_10_15 api_15_20 api_20_p hotel ltemp ltemp2 ym_* dow_*, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_ph_min_per_call r_api_5_10 r_api_10_15 r_api_15_20 r_api_20_p r_ltemp r_ltemp2 r_ym_* r_dow_*, cluster(Date EmployeeID) 

	tabresults "*api* *temp* " 

	** RESULTS: mechanism outcomes, linear, multi-way clustering, nantong
	qui eststo: cgmreg log_minutes_on_phone api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_on_phone r_api r_ltemp r_ltemp2 r_ym_* r_dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_ph_min_per_call api hotel ltemp ltemp2 ym_* dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_ph_min_per_call r_api r_ltemp r_ltemp2 r_ym_* r_dow_* if nantong_prod_sample == 1, cluster(Date EmployeeID) 

	tabresults "*api* *temp* " 

	** RESULTS: mechanism outcomes, linear, multi-way clustering, shanghai
	qui eststo: cgmreg log_minutes_on_phone api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_minutes_on_phone r_api r_ltemp r_ltemp2 r_ym_* r_dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 

	qui eststo: cgmreg log_ph_min_per_call api hotel ltemp ltemp2 ym_* dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 
	qui eststo: cgmreg r_log_ph_min_per_call r_api r_ltemp r_ltemp2 r_ym_* r_dow_* if shanghai_prod_sample == 1, cluster(Date EmployeeID) 

	tabresults "*api* *temp* " 

restore

************************************************************
**   Summarize key covariates, TEMP
************************************************************

** The following just answers a question Josh had...

** First, Shanghai sample
preserve
	keep if shanghai_prod_sample == 1
	bysort Date: keep if _n == 1

	gen year_month = ym( year(Date) , month(Date) ) 
	format year_month %tm

	gen month = month(Date)
	tab month

	codebook Date
	table year_month , c(N tmax_fah mean tmax_fah)

restore

preserve
	keep if nantong_prod_sample == 1
	bysort Date: keep if _n == 1

	gen year_month = ym( year(Date) , month(Date) ) 
	format year_month %tm

	gen month = month(Date)
	tab month

	codebook Date
	table year_month , c(N nantong_temp mean nantong_temp)

restore

************************************************************
**   Lead & lags for Numcall separately by city
************************************************************

foreach city in shanghai nantong {

	preserve

		keep if `city'_prod_sample == 1

		** Creating single local API measure **
		gen api_m0 = junjie_api_xx
		replace api_m0 = nantong_shuang_api_pm10 if nantong_prod_sample == 1

		** Create leads and lags... 
		foreach i in 1 2 3 4 {
			gen api_m`i' = junjie_api_xx_m`i'
			replace api_m`i' = nantong_shuang_api_pm10_m`i' if nantong_prod_sample == 1
		}
		foreach i in 1 2 {
			gen api_p`i' = junjie_api_xx_p`i'
			replace api_p`i' = nantong_shuang_api_pm10_p`i' if nantong_prod_sample == 1
		}

		** Creating single local temp measure **
		gen ltemp=tmax_fah
		replace ltemp=nantong_temp if nantong_prod_sample==1
		gen ltemp2 = ltemp*ltemp / 1000

		** Main effect with 1 lead and 3 lags
		qui cgmreg log_NumCall api_p1 api_m0 api_m1 api_m2 api_m3 hotel ltemp ltemp2 ym_* dow_* , cluster(Date EmployeeID) 

		matrix event_study = J(200, 4, 0)
		local row = 1 

		forvalues i = 1(1)1 {
			matrix event_study[`row', 1] = +`i' 
			matrix event_study[`row', 2] = _b[api_p`i'] - 1.98 * _se[api_p`i']
			matrix event_study[`row', 3] = _b[api_p`i'] 
			matrix event_study[`row', 4] = _b[api_p`i'] + 1.98 * _se[api_p`i']
			local row = `row' + 1
		}

		forvalues i = 0(1)3 {
			matrix event_study[`row', 1] = -`i'
			matrix event_study[`row', 2] = _b[api_m`i'] - 1.98 * _se[api_m`i']
			matrix event_study[`row', 3] = _b[api_m`i'] 
			matrix event_study[`row', 4] = _b[api_m`i'] + 1.98 * _se[api_m`i']
			local row = `row' + 1
		}

		drop _all
		svmat event_study
		rename event_study1 time
		rename event_study2 b1
		rename event_study3 b2
		rename event_study4 b3
		drop if time == 0 & b1 == 0 & b2 ==0 & b3 == 0 
		list

		reshape long b , i(time) j(type)
		list

		gen lower_bound_tmp = b if type == 1
		gen upper_bound_tmp = b if type == 3
		egen lower_bound = max(lower_bound_tmp) , by(time)
		egen upper_bound = max(upper_bound_tmp) , by(time)

		format upper_bound lower_bound b %-4.3f

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		tw ///
		(rcap lower_bound upper_bound time, ) ///
		(scatter b time if type == 2 , symbol(o) mcolor(blue) lcolor(blue) ) ///
		, ///
		scheme(s2mono) ylabel(, nogrid  angle(horizontal)  ) graphregion(fcolor(white)) ///
		yline(0) ///
		legend(off) ///
		aspectratio(`inv_golden_ratio') ///
		yscale( nofextend ) xscale(nofextend) ///
		xlabel(-3 `""Three days" "previous""' -2 `""Two days" "previous""' -1 `""One day" "before""' 0 `""Contemporaneous" "effect""' 1 `""Next" "day""') ///
		xtitle(" ") ///
		ytitle(" ") ///
		title(" ")
		graph save ../gph/leadlags-Numcall-3lags-1lead-`city'.gph , replace
	restore
}


************************************************************
**   Create histograms of our pollution measures, old version
************************************************************

codebook Date if shanghai_prod_sample == 1 
codebook Date if nantong_prod_sample == 1 

foreach bin in 5 10 25 50 {
	** First, the Shanghai histogram
	preserve

		keep if shanghai_prod_sample == 1

		bysort Date: keep if _n == 1

		** We re-normalize pollution here:
		replace junjie_api_xx = junjie_api_xx * 10

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		histogram junjie_api_xx , ///
		fcolor(gs10) ///
		ylabel(, nogrid angle(horizontal)  ) ///
		width(`bin') ///
		start(0) ///
		frequency ///
		scheme(s2mono) ///
		xscale( range(0 500 ) ) ///
		xlabel( 0(50)500) ///
		graphregion(fcolor(white)) ///
		aspectratio(`inv_golden_ratio') ///
		legend(off) ///
		yscale( nofextend ) xscale(nofextend) ///
		xtitle("API") ytitle(" ") 
		graph save ../gph/histogram-shanghai`bin'.gph , replace

	restore

	** Second, the Nantong histogram
	preserve

		keep if nantong_prod_sample == 1

		bysort Date: keep if _n == 1

		** We re-normalize pollution here:
		replace nantong_shuang_api_pm10 = nantong_shuang_api_pm10 * 10

		local inv_golden_ratio = 2 / ( sqrt(5) + 1 )
		graph set window fontface "Garamond" 
		histogram nantong_shuang_api_pm10 , ///
		fcolor(gs10) ///
		ylabel(, nogrid angle(horizontal)  ) ///
		width(`bin') ///
		start(0) ///
		frequency ///
		scheme(s2mono) ///
		xscale( range(0 500 ) ) ///
		graphregion(fcolor(white)) ///
		aspectratio(`inv_golden_ratio') ///
		legend(off) ///
		yscale( nofextend ) xscale(nofextend) ///
		xlabel( 0(50)500) ///
		xtitle("API") ytitle(" ") 
		graph save ../gph/histogram-nantong`bin'.gph , replace

	restore
}




log close
exit
